%MISVAL Fix the missing values in a dataset
%
%    B = MISVAL(A,VAL)
%    B = A*MISVAL([],VAL)
%
% INPUT
%    A    Dataset, containing NaNs (missing values)
%    VAL  String with substitution option
%         or value used for substitution
%
%    B    Dataset with NaNs substituted
%
% DESCRIPTION
%
% The following values for VAL are possible:
%   'remove'    remove objects (rows) that contain missing values (default)
%   'f-remove'  remove features (columns) that contain missing values
%   'mean'      fill the entries with the mean of their features
%   'c-mean'    fill the entries with the class mean of their features
%   <value>     fill the entries with a fixed constant
%
% SEE ALSO
% DATASETS

% Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands

function [x,msg] = misval(x,val)

if nargin < 2 | isempty(val), val = 'remove'; end
if nargin < 1 | isempty(x)
	x = mapping(mfilename,val);
	x = setname(x,'missing value')
else
	x = dataset(x);
	[m,k,c] = getsize(x);
	% Where are the offenders?
	I = isnan(x);
	% If there are missing values, go:
	if any(I(:))
		switch val
			case {'remove' 'delete'}
				J = find(sum(I,2)==0);
				x = x(J,:);
				msg = 'Objects with missing values have been removed.';
			case {'f-remove' 'f-delete'}
				J = find(sum(I,1)==0);
				x = x(:,J);
				msg = 'Features with missing values have been removed.';
			case 'mean'
				for i=1:k
					J = ~I(:,i);
					if any(I(:,i)) %is there a missing value in this feature?
						if ~any(J)
							error('Missing value cannot be filled: all values are NaN.');
						end
						mn = mean(x(J,i));
						x(find(I(:,i)),i) = mn;
					end
				end
				msg = 'Missing values have been replaced by the feature mean.';
			case 'c-mean'
				for j=1:c
					L = findnlab(x,j);
					for i=1:k
						J = ~I(L,i);
						if any(I(L,i)) %is there a missing value in this feature for this class?
							if ~any(J)
							 error('Missing value cannot be filled: all values are NaN.');
							end
							mn = mean(x(J,i));
							x(find(I(:,i)),i) = mn;
						end
					end
				end
				msg = 'Missing values have been replaced by the class feature mean.';
			otherwise
				if isstr(val)
					error('unknown option')
				end
				if ~isa(val,'double')
					error('Missing values can only be filled by scalars.');
				end
				x(I) = val;
				msg = sprintf('Missing values have been replaced by %f.',val);
		end
	end
end
return
